Data preparations

load("XSTSF_production.RData")
source('functions.R')

datasets

f0_all_ct <- f0_all_pre %>% filter(focus_condition == 'ct' ) %>% 
  group_by(speaker) %>% 
  mutate(norm_f0 = scale(log(f0))) %>% 
  ungroup() %>% 
  mutate(
    time = as.numeric(time),
    syllable_no = case_when(
    time > 0 & time < 11 ~ 1,
    time > 10 & time < 21 ~ 2,
    time > 20 & time < 31 ~ 3
  ),
  sync_tone1 = ifelse(sync_tone1 == 'RF', 'LHL', sync_tone1),
  sync_tone2 = ifelse(sync_tone2 == 'RF', 'LHL', sync_tone2),
  sync_tone3 = ifelse(sync_tone3 == 'RF', 'LHL', sync_tone3))

f0_tri_ct <- f0_all_ct %>% 
  filter(diortri == 'tri' & sandhi_tone != 'outlier')  %>% 
  mutate(sync_tone23 = paste0(sync_tone2, '_', sync_tone3),
         hist_tone23 = paste0(hist_tone2, '_', hist_tone3),
         hist_tone23_mapped = gsub("yinping", "Ia", 
                         gsub("yangping", "Ib", 
                         gsub("yinshang", "IIa", 
                         gsub("yangshang", "IIb", 
                         gsub("yinqu", "IIIa", 
                         gsub("yangqu", "IIIb", hist_tone23)))))),
         all_tone = paste(sync_tone1, hist_tone23_mapped, sep = "_"),
         sandhi_tone = ifelse(sandhi_tone == 'HHL', 'MML', 
                              ifelse(sandhi_tone == 'LHL', 'MHL', sandhi_tone)))
  
f0_tri_ct_yp <- f0_tri_ct %>% filter(hist_tone1 == 'yinping') 
f0_tri_ct_yap <- f0_tri_ct %>% filter(hist_tone1 == 'yangping')
f0_tri_ct_ys <- f0_tri_ct %>% filter(hist_tone1 == 'yinshang')
f0_tri_ct_yas <- f0_tri_ct %>% filter(hist_tone1 == 'yangshang')

yinping-initial

Monosyllabic tones

f0_tri_ct_yp %>%
  distri_prop(hist_tone1, sync_tone1, syntax)+
  xlab("historical tone")+
  labs(fill = "synchronic tone")

f0_tri_ct_yp %>%
  filter(startsWith(hist_tone2, "yin")) %>%
  distri_prop(hist_tone2, sync_tone2, syntax)+
  xlab("historical tone")+
  labs(fill = "synchronic tone")

f0_tri_ct_yp %>%
  filter(startsWith(hist_tone3, "yin")) %>%
  distri_prop(hist_tone3, sync_tone3, syntax)+
  xlab("historical tone")+
  labs(fill = "synchronic tone")

f0_tri_ct_yp %>%
  filter(startsWith(hist_tone2, "yang")) %>%
  distri_prop(hist_tone2, sync_tone2, syntax)+
  xlab("historical tone")+
  labs(fill = "synchronic tone")

f0_tri_ct_yp %>%
  filter(startsWith(hist_tone3, "yang")) %>%
  distri_prop(hist_tone3, sync_tone3, syntax)+
  xlab("historical tone")+
  labs(fill = "synchronic tone")

Sandhi categorisation

Auditory categorisation

unique(f0_tri_ct_yp$sandhi_tone)
## [1] "HLM" "MHL" "HML" "MMH" "MML"
p_cluster(f0_tri_ct_yp, sandhi_tone)
## Scale for colour is already present.
## Adding another scale for colour, which will replace the existing scale.

k-means clustering

# data preparation
f0_tri_ct_yp_kmeans <- f0_tri_ct_yp %>% 
  select(-diortri, -syllable_no, -focus_no, -f0) %>% 
  spread(time, norm_f0)

# k-means clustering
cluster_model <- k_means_clustering(f0_tri_ct_yp_kmeans)
kml(cluster_model, nbClusters = 2:10) 
##  ~ Fast KmL ~
## ***************************************************************************************************S
## 100 ********************************************************************************S
kml::plot(cluster_model, 2, parTraj=parTRAJ(col="clusters"))

kml::plot(cluster_model, 3, parTraj=parTRAJ(col="clusters"))

kml::plot(cluster_model, 4, parTraj=parTRAJ(col="clusters"))

kml::plot(cluster_model, 5, parTraj=parTRAJ(col="clusters"))

plotAllCriterion(cluster_model)

# get cluster results
f0_tri_ct_yp_kmeans <- f0_tri_ct_yp_kmeans %>% 
  mutate(cluster3 = getClusters(cluster_model, 3),
         cluster5 = getClusters(cluster_model, 5))

f0_tri_ct_yp_kmeans <- wide_to_long(f0_tri_ct_yp_kmeans) %>% 
  mutate(syllable_no = ifelse(time < 11, '1', 
                              ifelse(time < 21, '2', '3'))) %>% 
  mutate(cluster3_reorder = case_when(cluster3 == 'B' ~ 'A',
                                      cluster3 == 'C' ~ 'B',
                                      cluster3 == 'A' ~ 'C'),
         cluster5_reorder = case_when(cluster5 == 'C' ~ 'A',
                                      cluster5 == 'A' ~ 'B',
                                      cluster5 == 'D' ~ 'C',
                                      cluster5 == 'E' ~ 'D',
                                      cluster5 == 'B' ~ 'E'))

k-means cluster visualisation

f0_tri_ct_yp_kmeans <- f0_tri_ct_yp_kmeans %>% 
  mutate(cluster3_reorder = case_when(cluster3 == 'B' ~ 'A',
                                      cluster3 == 'C' ~ 'B',
                                      cluster3 == 'A' ~ 'C'),
         cluster5_reorder = case_when(cluster5 == 'C' ~ 'A',
                                      cluster5 == 'A' ~ 'B',
                                      cluster5 == 'D' ~ 'C',
                                      cluster5 == 'E' ~ 'D',
                                      cluster5 == 'B' ~ 'E'))

p_kmeans3 <- p_cluster(f0_tri_ct_yp_kmeans, cluster3_reorder);p_kmeans3

p_kmeans5 <- p_cluster(f0_tri_ct_yp_kmeans, cluster5_reorder);p_kmeans5